########################################################################################
### Replication file for Congressional Representation by Petition:                   ###
### Assessing the Voices of the Voteless in a Comprehensive New Database, 1789-1949  ###
###                                                                                  ###
### Authors: Maggie Blackhawk, Daniel Carpenter, Tobias Resch and Benjamin Schneer   ###
###                                                                                  ###
### File: contours_replication1.R                                                    ###
########################################################################################


#Set Working Directory
rm(list=ls())

#Set directory to location where replication files are stored
setwd("~/replication")

#load packages

#install.packages(c("data.table","readstata13","imputeTS","xtable","tm","topicmodels","SnowballC"))

require(data.table)
require(readstata13)
require(imputeTS)
require(xtable)
require(tm)
require(topicmodels)
options(scipen=5)


#Load petitions data (line by line data and summary data)

load("data/petitions.RData")



################
### Figure 2 ###
################

# Collapse petitions by topic and chamber of congress
out<-congress.topic.summary[,.(petitions.chamber=sum(N)),by=.(topic_broad,chamber)]

# Collapse petitions by topic only
out[,petitions:=sum(petitions.chamber),by=topic_broad]

# Reshape to wide and tweak labels
setorder(out,petitions,chamber)

out2<- dcast(out[!is.na(topic_broad)],chamber~topic_broad,value.var='petitions.chamber')

topic.order <- sort(as.numeric(out2[chamber=='house',2:ncol(out2),with=F]),index.return=T,decreasing=T)$ix

out3<-as.matrix(out2[2:1,2:ncol(out2),with=F])[,topic.order]
rownames(out3) <- out2$chamber

plot.label <- colnames(out3)

# Create barplot

png(file='figs/fig2.png')
par(mar=c(5, 8, 4, 2) + 0.1,mgp=c(3,0.5,0))
barplot(out3,beside=T,main="Petition Topics (Broad), 1789--1947",horiz=T,names=ifelse(nchar(plot.label)>30,ifelse(grepl("/",plot.label),gsub(" / "," /\n",plot.label),sub(" ","\n",plot.label,1)),plot.label),cex.names=.5,las=1,ylab="",col=c('gray','black'))
legend("topright",bty="n",c("House","Senate"),fill=c('black','gray'))
dev.off()

################
### Figure 1 ###
################

#Create plot of petitions over time

pt <- congress.topic.summary[,.(petitions=sum(N)),by=.(chamber,year)]

png(file='figs/fig1a.png')

pt[chamber=='house',plot(x=year,y=petitions,main="Total Petitions over time 1789--1947",type='n',xlab='Year',ylab='Petitions')]
pt[chamber=='house',lines(x=year,y=petitions)]
pt[chamber=='senate',lines(x=year,y=petitions,lty=2,col="blue")]
text(x=1900,y=2400,labels='Senate',col='blue')
text(x=1800,y=900,labels='House')
dev.off()

# Create plot of petitions over time normalized by US population

load("data/us_pop.RData")

pt <- us.pop[pt,on='year']

pt[,petitions.per.10k:=petitions/pop*10000]

png(file='figs/fig1b.png')

pt[chamber=='house',plot(x=year,y=petitions.per.10k,main="Total Petitions over time 1789--1947",type='n',xlab='Year',ylab='Petitions per 10K Pop.')]
text(x=1838,y=5,labels='Anti-Slavery',pos=2,cex=.75)
text(x=1891,y=2.7,labels='Anti-Sabbatarianism',pos=4,cex=.75)
pt[chamber=='house',lines(x=year,y=petitions.per.10k)]
pt[chamber=='senate',lines(x=year,y=petitions.per.10k,lty=2,col="blue")]
text(x=1900,y=1.75,labels='House')
text(x=1800,y=0.25,labels='Senate',col='blue')
dev.off()

#################
### Figure A4 ###
#################

#Categories over time

pty <- congress.topic.summary[,.(petitions=sum(N)),by=.(year,topic_broad)]
pty <- us.pop[pty,on='year']

pty[,petitions.per.10k:=petitions/pop*10000]

# Organize topics for multiple plots (by topic)
setkey(pty,topic_broad)

topic.sort <- pty[,.(ave.y=mean(petitions.per.10k,na.rm=T)),by=topic_broad][!is.na(topic_broad)]
setkey(topic.sort,ave.y)

topic.sort[1:4,group:=1]
topic.sort[5:8,group:=2]
topic.sort[9:12,group:=3]
topic.sort[13:16,group:=4]
topic.sort[17:19,group:=5]

pty <- topic.sort[pty,on='topic_broad'][!is.na(topic_broad)]

# Run for loop to create plots for each topic

# Allow y - axis to vary 
for (item in unique(pty$group)){

	png(file=paste('figs/figa4_',item,'.png',sep=''))

	par(mfrow=c(2,2))
	
	for (top in unique(pty[item==group,topic_broad])){
		pty[top==topic_broad,plot(x=year,y=petitions.per.10k,main=ifelse(nchar(top)>30,ifelse(grepl("/",top),gsub(" / "," /\n",top),sub(" ","\n",top,1)),top),type='l',xlab='Year',ylab='Petitions per 10K Pop.')]
	}
	dev.off()
}


#################
### Figure A5 ###
#################

# Restrict Y axis to be the same for every plot
for (item in unique(pty$group)){

	png(file=paste('figs/figa5_',item,'.png',sep=''))

	par(mfrow=c(2,2))
	
	for (top in unique(pty[item==group,topic_broad])){
		pty[top==topic_broad,plot(x=year,y=petitions.per.10k,main=ifelse(nchar(top)>30,ifelse(grepl("/",top),gsub(" / "," /\n",top),sub(" ","\n",top,1)),top),type='l',xlab='Year',ylab='Petitions per 10K Pop.',ylim=c(0,1.55))]
	}
	dev.off()
}

#################
### Table A1  ###
#################

# This table illustrates frequent terms in petitions by category

# Create corpus based on text of petition prayers
petcorp<-congress.final[,Corpus(VectorSource(prayer))]

# Pre-Process text data 
petcorp <- tm_map(petcorp , removeNumbers)
petcorp <- tm_map(petcorp , removePunctuation)
petcorp <- tm_map(petcorp , content_transformer(tolower))
petcorp <- tm_map(petcorp , removeWords, stopwords("english"))
petcorp <- tm_map(petcorp , stemDocument, language = "english")
petcorp <- tm_map(petcorp , content_transformer(function(x) gsub("pray","",gsub("","",x))))
petcorp <- tm_map(petcorp , stripWhitespace)


adtm<-DocumentTermMatrix(petcorp)
adtm <- removeSparseTerms(adtm, .999)
adtm <- weightTfIdf(adtm,normalize=T)


# Loop to create list of key terms -- we take the top 10 terms
keywords <- list()
keywords.n <- list()

topics <- unique(congress.final[,.(topic,topic_broad)])[!is.na(topic_broad)]

for (j in 1:nrow(topics)){
	topic0 <- topics[j,topic]
	topic_broad0 <- topics[j,topic_broad]
		
	p <- congress.final[,which(topic_broad==topic_broad0)]
	out <- apply(adtm[p,],2,sum)
	terms <- out[order(out,decreasing=T)]
	keywords.n[[topic0]] <- terms[1:10]
	keywords[[topic_broad0]] <- names(terms)[1:10]
}

out<-as.data.table(keywords)

print(xtable(out),include.rownames=T,type="html",file="figs/taba1.html")

